# #Set memory and load packages
# #invisible(utils::memory.limit(64000))
# pacman::p_load(tidyverse,ggplot2,dplyr,fixest,modelsummary,bookdown,forcats,Hmisc,tibble,modelsummary)
# 
# 
# wd<-('C:/Users/MUNTEANU_A/Dropbox/Research/2018 JMP')
# #wd<-('C:/Users/Andrei/Google Drive/Research/20190300 Romania BAC')
# wd_data<-paste0(wd,'/data/final/')
# setwd(wd_data)
data_regression<-data_student_raw
# openings<-readRDS('openings_anon')



#add instruments for Heckman; the proportion of dropouts in your school
data_regression<-data_regression %>% 
  dplyr::select(judet_bac,judet_adm,judet_ms,id_bac,media_la_admitere,id_adm,dec_town,entrance_perc,n_hs_town_group,dec_town,n_students_town_yr,n_hs_town,
                town,an,grad_perc,class_mean,school_mean,school_mean_yr,class_mean_yr,school_change,scoala_de_provenienta,unitate_de_invatamant,liceu_repartizat,
                school_harmonized,specializare_bac2,specializare_adm,Unemployment_hs_bac,Wages_hs_bac,drop_hs_hs_bac,
                town_hs_bac,Cod_SIRUTA_hs_bac,drop_middle_ms_adm,drop_hs_ms_adm,town_hs_bac,Cod_SIIIR_hs_bac)
gc()

data_regression<-as.data.frame(data_regression)
data_regression$n_hs_town_group<-data_regression$n_hs_town
data_regression$n_hs_town_group[data_regression$n_hs_town>=4 & data_regression$n_hs_town<=15]<-"4-15"
data_regression$n_hs_town_group[data_regression$n_hs_town>15]<-"16+"
data_regression$n_hs_town_group<-with(data_regression, reorder(n_hs_town_group, n_hs_town))

data_regression<-data_regression %>% group_by(Cod_SIRUTA_hs_bac) %>% mutate(n_school=length(unique(school_harmonized))) %>% ungroup
data_regression<-as.data.frame(data_regression)
data_regression<-data_regression %>% mutate(fe=paste0(town,":",scoala_de_provenienta))
data_regression<-data_regression %>% filter(an<=2019)

data_regression<-data_regression %>% mutate(quart=cut(entrance_perc, breaks = c(-Inf, 0.25,0.5,0.75, Inf), 
                                                      labels = c('1','2','3','4'), right = FALSE))
data_regression<-data_regression %>% mutate(dec=cut(entrance_perc, breaks = c(-Inf,0.1,0.2, 0.3,0.4,0.5,0.6,0.7,0.8,0.9, Inf), 
                                                    labels = c('1','2','3','4','5','6','7','8','9','10'), right = FALSE))

perc.rank <- function(x) {
  y<-rank(x)/length(x)
  #y<-trunc(rank(ifelse(is.na(x),0,x)))/length(x)
  #y<-(y-min(y))/(max(y)-min(y))
  return(y)}

data_regression$entrance_perc_town<-NA
data_regression[!is.na(data_regression$media_la_admitere),]<-data_regression[!is.na(data_regression$media_la_admitere),] %>%
  group_by(an,town_hs_bac) %>%
  mutate(entrance_perc_town=perc.rank(media_la_admitere)) %>%
  ungroup

data_regression<-data_regression %>% mutate(quart_town=cut(entrance_perc_town, breaks = c(-Inf, 0.25,0.5,0.75, Inf), 
                                                           labels = c('1','2','3','4'), right = FALSE))

data_regression<-data_regression %>% mutate(dec_town=cut(entrance_perc_town, breaks = c(-Inf,0.1,0.2, 0.3,0.4,0.5,0.6,0.7,0.8,0.9, Inf), 
                                                         labels = c('1','2','3','4','5','6','7','8','9','10'), right = FALSE))


####
school_timeline<-data_regression %>%
  filter(an<=2019) %>%
  group_by(judet_bac,town_hs_bac,an,Cod_SIIIR_hs_bac) %>% 
  summarise(exists=n()>0) %>%  
  #summarize(exists=sum(!is.na(media_la_admitere))>10) %>%  
  spread(an,exists) 


#replace NA's with 0's
school_timeline_long<- school_timeline %>% 
  ungroup %>%
  mutate_all(~replace(., is.na(.), 0)) %>% 
  gather(an,exists,c(4:15)) %>%
  arrange(town_hs_bac,Cod_SIIIR_hs_bac)
####
school_timeline<- school_timeline_long %>% 
  group_by(judet_bac,town_hs_bac,Cod_SIIIR_hs_bac) %>%
  mutate(exists=ifelse(exists==0 & lag(exists==1,default=first(exists)) & lead(exists==1,last(exists)),1,exists )) %>%
  spread(an,exists) %>%
  arrange(town_hs_bac,Cod_SIIIR_hs_bac)

#
schools_town_timeline<-school_timeline %>% 
  gather(an,exists,c(4:15)) %>%
  group_by(judet_bac,town_hs_bac,an) %>% 
  summarise(n_hs_town=sum(exists)) %>%  
  spread(an,n_hs_town) %>% 
  arrange(judet_bac,town_hs_bac)

n_hs<-1
towns_1<-schools_town_timeline %>% 
  filter(`2008`==n_hs & `2009`==n_hs & `2010`==n_hs & `2011`==n_hs & `2012`==n_hs & `2013`==n_hs &
           `2014`==n_hs & `2015`==n_hs & `2016`==n_hs & `2017`==n_hs & `2018`==n_hs & `2019`==n_hs ) %>%
  filter(town_hs_bac!=0)

n_hs<-2
towns_2<-schools_town_timeline %>% 
  filter(`2008`==n_hs & `2009`==n_hs & `2010`==n_hs & `2011`==n_hs & `2012`==n_hs & `2013`==n_hs &
           `2014`==n_hs & `2015`==n_hs & `2016`==n_hs & `2017`==n_hs & `2018`==n_hs & `2019`==n_hs ) %>%
  filter(town_hs_bac!=0)




data_regression<-dplyr::left_join(data_regression,openings,by=c('an'='an','town_hs_bac'='town_hs_bac'))


# #regression
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
rmarkdown::render("ddd.Rmd",knit_root_dir = getwd())

